## Do the matching on DA. Perceptions here are of own DA.

options(scipen = 8, digits = 4, width = 132)

library(here)
library(RItools)
library(dplyr)
library(designmatch)

source(here::here("Design", "nonbimatchingfunctions.R"))

load(here::here("Data", "wrkdat_DA0_new.rda"), verbose = TRUE)
load(here::here("Design", "dist_mats_DA_new.rda"), verbose = TRUE)

wdat0 <- wrkdat_DA0_new %>%
  dplyr::select(-geometry) %>%
  filter(!is.na(vm_change) & !is.na(social.capital01) & !is.na(community.resp01) & !is.na(vm)) %>%
  droplevels()
dim(wdat0)

## Make simple objective distance (maybe move this to
## dist_mats_data_.... because it overwrites that vmdaDist object)

vmdaDist <- scalar.dist(wdat0$da_prop_vm_20pct_06, scalefactor = 100000) ## the matching software wants integers
dimnames(vmdaDist) <- list(row.names(wdat0), row.names(wdat0))

## What are the kinds of differences we would see within pair?
## to create near and far criteria for the matching
perceptions_mat <- scalar.dist(wdat0$vm, int = FALSE)
vm_change_mat <- scalar.dist(wdat0$vm_change, int = FALSE)
area_mat <- scalar.dist(wdat0$community_area_km, int = FALSE)
quantile(as.vector(perceptions_mat), seq(0, .1, .01))
quantile(as.vector(vm_change_mat), seq(0, 1, .1))
quantile(as.vector(area_mat), seq(0, 1, .1))
quantile(as.vector(area_mat), seq(.9, 1, .01))
summary(wdat0$da_popdens_16)
quantile(wdat0$da_popdens_16, seq(0, 1, .1))

## Match exactly within csd type
canada_exact <- list(covs = as.matrix(wdat0$csd_urban_06))

## Require matches to be within .02 on da vm and .03 on change in vm 2016 - 2006
canada_nearlist <- list(
  covs = as.matrix(wdat0[, c("da_prop_vm_20pct_06", "vm_change")]),
  pairs = c(
    da_prop_vm_20pct_06 = .02,
    vm_change = .03
  )
)

## What is the smallest non zero difference in perceptions --- for the "far" constraint
## i'm using min/10 just in case there was some rounding in the perceptions_mat cal
min_perc_dist <- min(as.vector(perceptions_mat)[as.vector(perceptions_mat) != 0])
canada_farlist <- list(
  covs = as.matrix(wdat0[, "vm"]),
  pairs = c(vm = min_perc_dist / 10)
)

solverlist <- list(name = "gurobi", approximate = 1, t_max = 1000, trace = 1)

system.time(
  res <- nmatch(
    dist_mat = vmdaDist,
    near = canada_nearlist,
    far = canada_farlist,
    exact = canada_exact,
    total_pairs = 270,
    # subset_weight = 1,
    solver = solverlist
  )
)

nrow(wdat0)
length(res$group_id)
nrow(wdat0) - length(res$group_id)
## Dropped:
## 120 with subset_weight=10000 p=.67
## 180 with subset_weight=1000 p=.70
## 122 with subset_weight=NULL and total_pairs=282 p=.64
## 126 with subset_weight=NULL and total_pairs=280 p=.65
## 134 with subset_weight=NULL and total_pairs=270 p=.85
## 158 with subset_weight=NULL and total_pairs=260 p=.58
## 174 with subset_weight=NULL and total_pairs=250 p=.58
## 400 with subset_weight=100 p=.77
## 530 with subset_weight=10 p=.20
## 532 with subset_weight=1 p=.81

## Convert nmatch output into a vector for use in analysis
wdat0$id <- row.names(wdat0)
canada_pairs_df <- nmatch_to_df(res, origid = wdat0$id)
# The nmatch_to_df function creates a column labeled "pair" which contains
## the matched set indicator
wdat <- inner_join(wdat0, canada_pairs_df, by = "id")
wdat <- droplevels(wdat)
stopifnot(nrow(wdat) == nrow(canada_pairs_df))
stopifnot(length(unique(wdat$pair)) == nrow(wdat) / 2)
nrow(canada_pairs_df)
nrow(wdat)
nrow(wdat0)

## Checking to see that we didn't match any two people with identical perceptions
pairpercdiffs <- tapply(wdat$vm, wdat$pair, function(x) {
  abs(diff(x))
})
stopifnot(min(pairpercdiffs) > 0)

## This next looks at the continuous relationship with the variables that we
## really want to hold constant

thecovs <- c("da_prop_vm_20pct_06", "vm_change")
balfmla <- reformulate(thecovs, response = "vm")

xb1 <- xBalance(balfmla, strata = list(pair = ~pair), report = "all", data = wdat)
xb1$overall
xb1$results[, c("z", "p"), "pair"]

## Easier to think about "higher perceiver" versus "lower perceiver" when it
## comes to asking whether the matching worked.

wdat <- wdat %>%
  group_by(pair) %>%
  mutate(perc_more = rank(vm) - 1) %>%
  ungroup()

## Further simplify the working file
wdat1 <- wdat %>%
  mutate(pairF = factor(pair)) %>%
  dplyr::select(
    perc_more, da_prop_vm_20pct_06, vm_change, pair, pairF, social.capital01, community.resp01,
    vm, vcid, dauid, csd_pop_06, csd_prop_vm_20pct_06,
    da_pop_dens_06, csd_urban_06, csd_pop_dens_06, community_area_km
  )

xb1_ranked0 <- xBalance(perc_more ~ da_prop_vm_20pct_06 + vm_change,
  strata = list(unstr = NULL, pair = ~pair), report = "all", data = wdat
)
xb1_ranked0$overall
xb1_ranked0$results

nrow(wdat)
nrow(wdat0)
nrow(wdat0) - nrow(wdat)
xb1_ranked0$overall

balfmla_ranked <- update(balfmla, perc_more ~ .)
xb1_ranked <- balanceTest(update(balfmla_ranked, . ~ . + strata(pair)), data = wdat1, p.adjust = "none")
xb1_ranked$overall
xb1_ranked$results[, c("Control", "Treatment", "adj.diff", "std.diff", "p"), ] # , "pair"]

## An alternative for the overall test which should be fairly close to it
library(formula.tools)
library(coin)
coin_fmla <- ~ vm | pairF
lhs(coin_fmla) <- rhs(balfmla)
coin_test <- independence_test(coin_fmla, data = wdat1, teststat = "quadratic")
coin_test_perm <- independence_test(coin_fmla, data = wdat1, teststat = "quadratic", distribution = approximate(nresample = 1000))
coin::pvalue(coin_test)
coin::pvalue(coin_test_perm)

coin_fmla_rank <- da_prop_vm_20pct_06 + vm_change ~ perc_more | pairF
coin_test_rank <- independence_test(coin_fmla_rank, data = wdat1, teststat = "quadratic")
coin_test_rank_perm <- independence_test(coin_fmla_rank,
  data = wdat1,
  teststat = "quadratic", distribution = approximate(nresample = 1000)
)
coin::pvalue(coin_test_rank)
coin::pvalue(coin_test_rank_perm)

pairdiffs <- wdat1 %>%
  group_by(pair) %>%
  summarize(
    da_prop_vm_20pct_06 = abs(diff(da_prop_vm_20pct_06)),
    vm_change = abs(diff(vm_change)),
    vm = abs(diff(vm)),
    csd_pop_06 = abs(diff(csd_pop_06)),
    csd_pop_dens_06 = abs(diff(csd_pop_dens_06)),
    csd_prop_vm_20pct_06 = abs(diff(csd_prop_vm_20pct_06)),
    # da_pop_dens_06 = abs(diff(da_pop_dens_06)),
    community.area.km = abs(diff(community_area_km))
  )

sapply(pairdiffs, summary)

sapply(pairdiffs, function(x) {
  quantile(x, sort(unique(c(seq(0, 1, .1), seq(.9, 1, .01)))), na.rm = TRUE)
})

matches_DA_new <- wdat1[, c("vcid", "pair")]

save(matches_DA_new, file = here::here("Design", "matches_DA_new.rda"))
